file does the following:
• Finds all the characters appearing in the file.
• Counts the frequencies of all the characters.
• Constructs and shows the Huffman code corresponding to the computed
frequencies.
• Encodes your file using the code (Hint: Bitwise operator).
• Computes the compression ratio Output File size / Input File size
In [59]:
def generate_random_text_file(length = 200, debug = False):
"""generate a text file with random string"""
text_file = open("./random_text.txt","w")
import random, string
random_string = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(length)])
if debug: print random_string
text_file.write(random_string)
text_file.close()
def load_file_char_freq(path_to_file= "./random_text.txt", debug = False):
"""Load a file, and get frequencies of each character. Out is a dictionary in form {"char": 'freq'}"""
path = raw_input("Relative path to text file. Leave blank for to generate random text: ")
if path is "": generate_random_text_file()
else: path_to_file = path
try: text_file = open(path_to_file,"r")
except:
print "File not found"
raise IOError("No file found with that name")
text = text_file.read()
list_of_chars = list(text)
dict_of_chars= {}
for char in list_of_chars:
try: dict_of_chars[char] +=1
except: dict_of_chars[char] = 1
if debug:
print dict_of_chars
print text
text_file.close()
return dict_of_chars, text
def calculate_compression(old_string, new_string):
"""Calculates ratio, which is nothing but ratio of string lengths. Since huffman stores in binary, we must devided by 8."""
return len(new_string)/len(old_string) / 8.0 * 100.0
def huffman_encode(dict_of_chars, debug = False):
"""Huffman encode the given dict mapping symbols to weights
Leaf is of form [ [freq, [symbol, code]]. Python gives min heap from via Heapify function"""
heap = [[frequency, [char, ""]] for char, frequency in dict_of_chars.items()] #Construct un Heapified heap
heapify(heap) # Python's In place Minheap
if debug: print "Trying to Encode: ", sorted(dict_of_chars.iteritems())
while len(heap) > 1:
small_node = heappop(heap) # Get smallest of element from Min Heap
big_node = heappop(heap) # Get second smallest element
# The idea is to dynamically generate codes. If node is pushed down-left, add a Zero. If pushed to down-right, add a one
for node in small_node[1:]:
node[1] = '0' + node[1]
for node in big_node[1:]:
node[1] = '1' + node[1]
# New node has sum of freq as new freq. All within subtree follow freq in list. Thier order is determined by thier code.
new_node = [small_node[0] + big_node[0]] + small_node[1:] + big_node[1:]
# Push to min heap
heappush(heap, new_node)
#Return all leaves, which are already in list format [ Symbol, code]
return heappop(heap)[1:]
The 'true' Huffman code creator assembles each symbol and its weight into the following structure initially (the leaf structure):
[ weight, [ symbol, []]]
The weight applies to every (in this case only one), of the
[symbol, []]
pairs after it in the same list. The empty list is used to accumulate the Huffman code for the symbol as we manipulate the heap, without having to walk a constructed tree structure.
Source: from Paddy3118 Go deh!: Huffman Encoding in Python http://paddy3118.blogspot.com/2009/03/huffman-encoding-in-python.html#ixzz3XJn3fTAK
In [60]:
from heapq import heappush, heappop, heapify
dict_of_chars, input_string = load_file_char_freq()
codes = huffman_encode(dict_of_chars)
print "\n\nSymbol\t | Frequency\t | Huffman Code"
print "--------------------------------------"
for code in codes:
print "%s \t\t %s \t\t %s" % (code[0], dict_of_chars[code[0]], code[1])
huff_code_dict = {}
for code in codes: huff_code_dict[code[0]] = code[1]
new_string= ''
for symbol in list(input_string):
new_string += huff_code_dict[symbol]
print "\n\n------------- Calculating Compression Ratio--------------------------"
print "Original Char length: ", len(input_string)
print "Original Binary length: ", str(len(input_string) * 8.0)
print "New Binary length: ", len(new_string)
print "Compression Ratio is: ",calculate_compression(input_string, new_string),"%"
There are n players in a tournament. Each player plays against every other player. Thus, there are nC2 matches in the tournament. There are no draw in the matches. We say that player p1 subdued p2 if p1 defeated p2 or p1 defeated some player p3 who defeated p2. Note that p1 and p2 might have subdued each other. A king in the tournament is a player who has subdued every other player
Write a program that given the results of a tournament finds if it has a king, if yes then it should also output a king.
• Model the problem as a graph theoretic problem.
• Decide a good data structure to store the results of the matches.
• The run-time of your program should be O(n^2).
Problem can be easily solved by making players as nodes, and edges as matches. Direction of edge represents outcome of match.
We run BFS from each node. If after BFS all nodes are visited, the Node is a king.
Assumptions:
* A player cannot defeat himself/ herself.
* Graph is rep by Python dictionary, where player is key, adjacency list is value.
In [14]:
from random import randint
def create_edge(tournament, winner, loser):
tournament[winner].append( loser)
return tournament
def generate_random_tournament_data(num_players = 10):
"""Just a simple function that simulates all the matches, randomly, outputs in Dict format"""
i,j=0,0
#Create list of players
players = [ "Player "+str(num) for num in range(num_players)]
tournament_data = {}
for player in players: tournament_data[player] = []
for i in range(len(players)):
for oponent in players[i:]:
if oponent is players[i]: continue
#print players[i], " vs ", oponent
result = randint(0,1)
if result ==0: tournament_data = create_edge(tournament_data, players[i], oponent )
else: tournament_data = create_edge(tournament_data, oponent, players[i] )
return tournament_data
def breadth_first_search(graph, start):
""" We use BFS to verify given player is king"""
visited, queue = set(), [start]
while queue:
vertex = queue.pop(0)
if vertex not in visited:
visited.add(vertex)
queue.extend(graph[vertex] - visited)
return visited
def find_king(graph):
""" Player with maximum outdegree is a king. Runtime O(n^2)"""
max_out_degree=0
king = None
for player in graph.keys():
curr_out_degree = len(graph[player])
if curr_out_degree > max_out_degree:
max_out_degree = curr_out_degree
king = player
return king
In [13]:
num_player = input("Please input number of players in tournament:")
tournament_dict = generate_random_tournament_data(num_player)
print "\n---------------------------------"
print " Player \t | Match WOn Against"
print "---------------------------------"
for player in tournament_dict.keys():
print player, "\t", tournament_dict[player]
tournament_dict[player] = set(tournament_dict[player])
print "\n\n----------------------\n", "Finding Kings\n", "----------------------\n"
king = None
king = find_king(tournament_dict)
# Verifying that king is infact a king, Runtime O(n^2)
visited = breadth_first_search(tournament_dict, king)
if king is None: print "No King"
else:
print king, " is the King with ", len(visited)," subjugations."
In [6]:
Out[6]:
In [5]:
In [5]:
In [ ]: